#!/bin/bash

set -Eeuo pipefail
shopt -s inherit_errexit

# expects the following variables defined in the environment:
# - BUCKET_CONFIG_NAME
# - BUCKET_CONFIG
# - REPOS
#
# If private GitLab repo access is required:
# - GITLAB_READ_REPO_TOKENS: json {"host": "token_name"}
# - Any variable specified by "token_name"

# shellcheck disable=SC1091
source cki_utils.sh

REPO_TEMPDIR="$(mktemp -d)"
trap 'rm -rf "$REPO_TEMPDIR"' EXIT

# shellcheck disable=SC2154
read -ra GIT_URLS -d '' <<< "${REPOS}" || true  # EOF expected

# shellcheck disable=SC2154
cki_parse_bucket_spec "${BUCKET_CONFIG_NAME}"
# shellcheck disable=SC2154
AWS_S3=(aws s3 --endpoint "${AWS_ENDPOINT}")
# shellcheck disable=SC2154
S3_PATH="s3://${AWS_BUCKET}/${AWS_BUCKET_PATH}"
REPOS_IN_S3="$( ("${AWS_S3[@]}" ls "${S3_PATH}" || true) | awk '{print $NF}')"

function update_repo {
    GIT_URL="${1}"
    GIT_URL_LC="${GIT_URL,,}"                 # everything lowercase
    GIT_HOST="${GIT_URL_LC#*//}"              # strip protocol
    GIT_HOST="${GIT_HOST%%/*}"                # strip everything after hostname
    GIT_REPO_PATH="${GIT_URL_LC#*//*/}"       # strip protocol and host
    GIT_URL_PATH="${GIT_REPO_PATH%.git}"      # strip optional .git suffix
    GIT_URL_REPO="${GIT_URL_PATH##*/}"        # strip any directories
    if [[ "${GIT_URL_PATH}" == */* ]]; then
        GIT_URL_OWNER="${GIT_URL_PATH%/*}"    # strip repo
        GIT_URL_OWNER="${GIT_URL_OWNER##*/}"  # strip any parent directories
    else
        # default to "git" owner
        # this makes the following URLs compatible (cgit):
        # - git://host.com/repo.git
        # - http://host.com/git/repo.git
        GIT_URL_OWNER="git"
    fi

    # Check if we require auth. If so, retrieve the token and adjust the URL.
    TOKEN_NAME="$(jq --arg GIT_HOST "${GIT_HOST}" -r '.[$GIT_HOST] // empty' <<< "${GITLAB_READ_REPO_TOKENS:-}")"
    if [[ -v ${TOKEN_NAME} ]] ; then
        git config --global "credential.${GIT_URL}.username" oauth2
        git config --global "credential.${GIT_URL}.helper" "!echo password=\$${TOKEN_NAME}; :"
    fi

    # We store repositories as "owner.repo".
    REPO_TARBALL="${GIT_URL_OWNER}.${GIT_URL_REPO}.tar"
    REPO_REFS="${GIT_URL_OWNER}.${GIT_URL_REPO}.refs.md5"

    cki_echo_yellow "${GIT_URL_OWNER}/${GIT_URL_REPO}:"

    rm -rf "${REPO_TEMPDIR}"
    mkdir -p "${REPO_TEMPDIR}"

    # Download the cached refs and compare it to current upstream
    echo -n "  Checking ${REPO_REFS} in S3... "
    REPO_REFS_MD5="$(git ls-remote --refs "${GIT_URL}" | md5sum)"
    if grep -E "^${REPO_REFS}$" <<< "${REPOS_IN_S3}" > /dev/null; then
        REPO_REFS_MD5_OLD="$("${AWS_S3[@]}" cp --no-progress "${S3_PATH}${REPO_REFS}" -)"
        cki_echo_green "found"
    else
        REPO_REFS_MD5_OLD="nonexistent"
        cki_echo_red "not found"
    fi

    # Download the old cache and extract it.
    echo -n "  Checking ${REPO_TARBALL} in S3... "
    if ! grep -E "^${REPO_TARBALL}$" <<< "${REPOS_IN_S3}" > /dev/null; then
        cki_echo_red "not found"
        echo "  Cloning repository..."
        git clone --mirror --quiet "${GIT_URL}" "${REPO_TEMPDIR}"
    elif [[ ${REPO_REFS_MD5_OLD} != "${REPO_REFS_MD5}" ]]; then
        cki_echo_red "outdated"
        echo "  Downloading ${REPO_TARBALL}..."
        "${AWS_S3[@]}" cp --no-progress "${S3_PATH}${REPO_TARBALL}" - | tar -xf - -C "${REPO_TEMPDIR}"
        echo "  Updating repository..."
        pushd "${REPO_TEMPDIR}" > /dev/null
            # keep gc in the foreground as tar gets unhappy if gc didn't finish before packing it up
            git config gc.autoDetach false
            git remote set-url origin "${GIT_URL}"
            git fetch --all --prune 2>&1 | sed 's/^/    /'
            git repack --geometric 2 -d
        popd > /dev/null
    else
        cki_echo_green "up to date"
        return 0
    fi

    # Stream the archive of the updated cache to S3. Compression saves maybe
    # 5-10% here but the CPU usage and time spent waiting isn't worth it.
    echo "  Uploading ${REPO_TARBALL}..."
    pushd "${REPO_TEMPDIR}" > /dev/null
        tar cf - . | "${AWS_S3[@]}" cp --no-progress - "${S3_PATH}${REPO_TARBALL}"
    popd > /dev/null
    rm -rf "${REPO_TEMPDIR}"

    # Update refs in S3
    echo "  Uploading ${REPO_REFS}..."
    "${AWS_S3[@]}" cp --no-progress - "${S3_PATH}${REPO_REFS}" <<< "${REPO_REFS_MD5}"
}

# try to run all updates, but fail the script if any updates failed
declare -i FAILED=0
# shellcheck disable=SC2154,SC2153
for GIT_URL in "${GIT_URLS[@]}"; do
    # we want set -e still in effect, but also get the exit code; so
    # use a background process, as set -e is kept for background
    # commands, and the exit code checking is possible with wait later
    update_repo "${GIT_URL}" &
    if ! wait $!; then
        FAILED+=1
    fi
done
if (( FAILED > 0 )); then
    cki_echo_red "${FAILED} updates failed."
    exit 1
fi
cki_echo_green "All updates completed successfully."
